/**
 *  This file is part of FNLP (formerly FudanNLP).
 *
 *  FNLP is free software: you can redistribute it and/or modify
 *  it under the terms of the GNU Lesser General Public License as published by
 *  the Free Software Foundation, either version 3 of the License, or
 *  (at your option) any later version.
 *
 *  FNLP is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU Lesser General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with FudanNLP.  If not, see <http://www.gnu.org/licenses/>.
 *
 *  Copyright 2009-2014 www.fnlp.org. All rights reserved.
 */

package org.fnlp.demo.nlp;

import org.fnlp.ml.types.Dictionary;
import org.fnlp.nlp.cn.tag.CWSTagger;
import org.fnlp.nlp.cn.tag.POSTagger;

/**
 * 词性标注使用示例
 * @author xpqiu
 *
 */
public class PartsOfSpeechTag {

    static POSTagger tag;

    /**
     * 主程序
     * @param args
     * @throws Exception
     * @throws
     */
    public static void main(String[] args) throws Exception {


        CWSTagger cws = new CWSTagger("./models/seg.m");
        tag = new POSTagger(cws, "./models/pos.m");

        System.out.println("得到支持的词性标签集合");
        System.out.println(tag.getSupportedTags());
        System.out.println(tag.getSupportedTags().size());
        System.out.println("\n");

        String str = "媒体计算研究所成立了，高级数据挖掘很难。乐phone很好！";
        String s = tag.tag("2019年10月10号凌晨2点半，涟水交警接警后，发现现场除了受害者李梅，身份证号500102198802012212，电话18600213245，固定电话02387898765，家住：建胜镇龙桥花苑小区2栋7-3的血迹、一块机动车号牌苏A F001U和苏AF001U少量玻璃碎片外，没有其他有价值的线索。令人悲痛的是，伤者也因伤势过重经抢救无效身亡。请街道协同区环保局、城管局和执法部门进行整改。");
        System.out.println("处理未分词的句子");
        System.out.println(s);

        System.out.println("使用英文标签");
        tag.SetTagType("en");
        System.out.println(tag.getSupportedTags());
        System.out.println(tag.getSupportedTags().size());
        s = tag.tag(str);
        System.out.println(s);
        System.out.println();

        CWSTagger cws2 = new CWSTagger("./models/seg.m", new Dictionary("./models/dict.txt"));

        //bool值指定该dict是否用于cws分词（分词和词性可以使用不同的词典）
        tag = new POSTagger(cws2, "./models/pos.m"
            , new Dictionary("./models/dict.txt"), true);//true就替换了之前的dict.txt
        tag.removeDictionary(false);//不移除分词的词典
        tag.setDictionary(new Dictionary("./models/dict.txt"), false);//设置POS词典，分词使用原来设置

        String str2 = "媒体计算研究所成立了，高级数据挖掘很难。乐phone很好！";
        String s2 = tag.tag(str2);
        System.out.println("处理未分词的句子，使用词典");
        System.out.println(s2);
        System.out.println();

        Dictionary dict = new Dictionary();
        dict.add("媒体计算", "mypos1", "mypos2");
        dict.add("乐phone", "专有名");
        tag.setDictionary(dict, true);
        String s22 = tag.tag(str2);
        System.out.println(s22);
        System.out.println();

        POSTagger tag1 = new POSTagger("./models/pos.m");
        String str1 = "媒体计算 研究所 成立 了 , 高级 数据挖掘 很 难";
        String[] w = str1.split(" ");
        String[] s1 = tag1.tagSeged(w);
        System.out.println("直接处理分好词的句子:++++++++++");
        for (int i = 0; i < s1.length; i++) {
            System.out.print(w[i] + "/" + s1[i] + " ");
        }
        System.out.println("\n");

        POSTagger tag3 = new POSTagger("./models/pos.m", new Dictionary("./models/dict.txt"));
        String str3 = "媒体计算 研究所 成立 了 , 高级 数据挖掘 很 难 ";
        String[] w3 = str3.split(" ");
        String[] s3 = tag3.tagSeged(w3);
        System.out.println("直接处理分好词的句子，使用词典");
        for (int i = 0; i < s3.length; i++) {
            System.out.print(w3[i] + "/" + s3[i] + " ");
        }
        System.out.println("\n");

        //????????????????????????????

        System.out.println("重新构造");
        cws = new CWSTagger("./models/seg.m");
        tag = new POSTagger(cws, "./models/pos.m");
        str = "媒体计算研究所成立了, 高级数据挖掘很难";
        System.out.println(tag.tag(str));
        String[][] sa = tag.tag2Array(str);
        for (int i = 0; i < sa.length; i++) {
            for (int j = 0; j < sa[i].length; j++) {
                System.out.print(sa[i][j] + " ");
            }
            System.out.println();
        }

        String s4 = tag.tagFile("./example-data/data-tag.txt");
        System.out.println("\n处理文件：");
        System.out.println(s4);
    }

}